Source code for hysop.tools.string_utils

# Copyright (c) HySoP 2011-2024
#
# This file is part of HySoP software.
# See "https://particle_methods.gricad-pages.univ-grenoble-alpes.fr/hysop-doc/"
# for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import re
from hysop import vprint
from hysop.tools.htypes import check_instance, to_tuple, first_not_None


[docs] def camel2snake(string): """ Convert a string from camel case to snake case. """ s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", string) return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower()
[docs] def prepend(string, prefix): """ Append prefix after each line return in string. """ lines = [] for s in string.split("\n"): if s: lines.append(prefix + s) else: lines.append(s) return "\n".join(lines)
[docs] def vprint_banner(msg, c="*", spacing=False, at_border=0): """ Print a message preceded and succeded by separation lines. """ msg = msg.split("\n") maxlen = max(len(m) for m in msg) fulllen = maxlen + 2 * at_border if spacing: vprint() vprint(c * fulllen) for m in msg: m = c * at_border + m + max(0, maxlen - len(m)) * " " + c * at_border vprint(m) vprint(c * fulllen) if spacing: vprint()
[docs] def framed_str(title, msg, c="=", at_border=2): """ Format a message to fit between two separation lines containing a title. """ clean = lambda s: re.sub(r"[^\x00-\x7f]", "", s) length = max(len(clean(m)) for m in msg.split("\n")) title = c * at_border + title + c * at_border header = title + c * max(0, length - len(title)) footer = c * len(header) return f"{header}\n{msg}\n{footer}"
[docs] def strlen(s): """Like length but replace unicode characters by space before applying len()""" return len(s)
[docs] def multiline_split(strdata, maxlen, split_sep, replace, newline_prefix=None): """ Utility function to split one line of a column representation of string data into smaller pieces: Input data (strdata): (s0 s1 s2 s3) Output data: [ (s0.0, s1.0, s2.0, s3.0), (s0.1, ----, s2.1, s3.1), (----, ----, s2.2, ----), (----, ----, s2.3, ----) ] According to splitting rules: maxlen: maximum length of each column in characters split_sep: allowed characters to split, per column replace: replacement when the string is too short (here ----) newline_prefix: prefix for each newline split, per column All string inputs can be of type str. """ sstr = str check_instance(strdata, tuple, values=sstr) ndata = len(strdata) newline_prefix = first_not_None(newline_prefix, ("",) * ndata) check_instance(maxlen, tuple, values=(type(None), int), size=ndata) check_instance(split_sep, tuple, values=(type(None), sstr, tuple), size=ndata) check_instance(newline_prefix, tuple, values=(type(None), sstr, tuple), size=ndata) check_instance(replace, tuple, values=sstr, size=ndata) if all((ml is None) for ml in maxlen): return [strdata] split_sep = list(split_sep) for i, (ml, ss) in enumerate(zip(maxlen, split_sep)): if ml is not None: assert ss is not None, "maxlen specified but separator was not specified." assert (ss != "") and ( ss != () ), "maxlen specified but separator was not specified." split_sep[i] = to_tuple(ss) else: split_sep[i] = () split_sep = tuple(split_sep) splitted_data = [] for s, ml, ss, nlp in zip(strdata, maxlen, split_sep, newline_prefix): if (ml is None) or strlen(s) < ml: data = [s] else: s = s split = [s] for sep in ss: split = list( y + (sep if (i != len(x.split(sep)) - 1) else "") for x in split for (i, y) in enumerate(x.split(sep)) ) data = [] s = "" while split: while split and (strlen(s) < ml): s += split.pop(0) data.append(s) s = nlp splitted_data.append(data) nsplits = max(len(x) for x in splitted_data) if nsplits == 1: return [strdata] for i, x in enumerate(splitted_data): assert nsplits >= len(x) splitted_data[i] += [replace[i]] * (nsplits - len(x)) data = [] for i in range(nsplits): data.append(type(strdata)(x[i] for x in splitted_data)) strdata = data return strdata